original_world_happiness_2021 <- read.csv("/Users/karissatschida/Documents/2021-2022/Spring 2022/ISyE312/Project/final project/original_world_happiness_2021.csv")
world_happiness_2021 <- original_world_happiness_2021 %>%
summarize(country_name, regional_indicator, ladder_score, logged_gdp_per_capita, social_support, healthy_life_expectancy, freedom_to_make_life_choices, generosity, perceptions_of_corruption)
Link to Kaggle Dataset: https://www.kaggle.com/datasets/ajaypalsinghlo/world-happiness-report-2021
Link to World Happiness Report 2021 Webpage: https://worldhappiness.report/ed/2021/
Link to World Happiness Report 2021 PDF: https://happiness-report.s3.amazonaws.com/2021/WHR+21.pdf
plot(world_happiness_2021$ladder_score, world_happiness_2021$logged_gdp_per_capita,
xlab = "Ladder Score",
ylab = "Logged GDP per Capita",
main = "GDP vs. Ladder Score")
plot(world_happiness_2021$ladder_score, world_happiness_2021$social_support,
xlab = "Ladder Score",
ylab = "Social Support",
main = "Social Support vs. Ladder Score")
plot(world_happiness_2021$ladder_score, world_happiness_2021$healthy_life_expectancy,
xlab = "Ladder Score",
ylab = "Healthy Life Expectancy",
main = "Healthy Life Expectancy vs. Ladder Score")
plot(world_happiness_2021$ladder_score, world_happiness_2021$freedom_to_make_life_choices,
xlab = "Ladder Score",
ylab = "Freedom to Make Life Choices",
main = "Freedom to Make Life Choices vs. Ladder Score")
plot(world_happiness_2021$ladder_score, world_happiness_2021$generosity,
xlab = "Ladder Score",
ylab = "Generosity",
main = "Generosity vs. Ladder Score")
plot(world_happiness_2021$ladder_score, world_happiness_2021$perceptions_of_corruption,
xlab = "Ladder Score",
ylab = "Perceptions of Corruption",
main = "Perceptions of Corruption vs. Ladder Score")
num_world_happiness_2021 <- world_happiness_2021 %>%
summarize(ladder_score, logged_gdp_per_capita, social_support, healthy_life_expectancy, freedom_to_make_life_choices, generosity, perceptions_of_corruption)
cor(num_world_happiness_2021, method = "pearson", use = "complete.obs")
## ladder_score logged_gdp_per_capita social_support
## ladder_score 1.00000000 0.7897597 0.7568876
## logged_gdp_per_capita 0.78975970 1.0000000 0.7852987
## social_support 0.75688765 0.7852987 1.0000000
## healthy_life_expectancy 0.76809946 0.8594606 0.7232561
## freedom_to_make_life_choices 0.60775307 0.4323235 0.4829298
## generosity -0.01779928 -0.1992864 -0.1149459
## perceptions_of_corruption -0.42114000 -0.3423374 -0.2032070
## healthy_life_expectancy
## ladder_score 0.7680995
## logged_gdp_per_capita 0.8594606
## social_support 0.7232561
## healthy_life_expectancy 1.0000000
## freedom_to_make_life_choices 0.4614939
## generosity -0.1617503
## perceptions_of_corruption -0.3643735
## freedom_to_make_life_choices generosity
## ladder_score 0.6077531 -0.01779928
## logged_gdp_per_capita 0.4323235 -0.19928640
## social_support 0.4829298 -0.11494585
## healthy_life_expectancy 0.4614939 -0.16175028
## freedom_to_make_life_choices 1.0000000 0.16943737
## generosity 0.1694374 1.00000000
## perceptions_of_corruption -0.4013630 -0.16396173
## perceptions_of_corruption
## ladder_score -0.4211400
## logged_gdp_per_capita -0.3423374
## social_support -0.2032070
## healthy_life_expectancy -0.3643735
## freedom_to_make_life_choices -0.4013630
## generosity -0.1639617
## perceptions_of_corruption 1.0000000
corrplot(cor(num_world_happiness_2021))
happiness_model <- lm(ladder_score ~ regional_indicator + logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = world_happiness_2021)
anova(happiness_model)
## Analysis of Variance Table
##
## Response: ladder_score
## Df Sum Sq Mean Sq F value Pr(>F)
## regional_indicator 9 106.053 11.7837 48.0061 < 2.2e-16 ***
## logged_gdp_per_capita 1 18.245 18.2452 74.3300 1.716e-14 ***
## social_support 1 4.328 4.3284 17.6338 4.872e-05 ***
## healthy_life_expectancy 1 0.943 0.9432 3.8426 0.05206 .
## freedom_to_make_life_choices 1 7.511 7.5112 30.6000 1.621e-07 ***
## generosity 1 0.687 0.6873 2.8002 0.09661 .
## perceptions_of_corruption 1 0.275 0.2752 1.1212 0.29159
## Residuals 133 32.646 0.2455
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yhat <- happiness_model$fitted.values
res <- happiness_model$residuals
t <- rstudent(happiness_model)
stdres <- rstandard(happiness_model)
par(mfrow=c(1,2))
plot(yhat, res, ylab="Residuals", xlab="Fitted Value", main="Residual vs Fitted")
abline(0, 0)
plot(yhat, t, ylab="Studentized", xlab="Fitted Value", main="Studentized Residual vs Fitted")
abline(0, 0)
par(mfrow=c(1,2))
qqnorm(res, ylab="Residuals", xlab="Normal Scores",main="Residual Q-Q Plot")
qqline(res)
qqnorm(t, ylab="Studentized Residuals", xlab="Normal Scores",main="Studentized Residual Q-Q Plot")
qqline(t)
Create a linear model.
mod <- lm(ladder_score ~ regional_indicator + logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = world_happiness_2021)
cooksd <- cooks.distance(mod)
Plot Cook’s Distance.
plot(cooksd,
xlab = "Index",
ylab = "Cook's Distance",
main = "1. Cook's Distance for Influential Observations")
Plot Cook’s Distance using the traditional cut-off point of 4/sample size (or 4/n).
n <- nrow(world_happiness_2021)
plot(cooksd,
xlab = "Index",
ylab = "Cook's Distance",
main = "2. Cooks Distance for Influential Observations",
sub = "cut-off point of 4/n represented by blue dashed line")
abline(h = 4/n, lty = 2, col = "blue")
Create a bar plot of Cook’s Distance to display the observations that are influential points of the fitted model.
ols_plot_cooksd_bar(mod)
Create a chart of Cook’s Distance to display the observations that are influential points of the fitted model.
ols_plot_cooksd_chart(mod)
As displayed above by the “2. Cook’s Distance for Influential Observations” plot, the “Cook’s D Bar Plot”, and the “Cook’s D Chart” above, there are 13 influential points in our data.
Remove the 13 outliers in the data set.
influential_obs <- as.numeric(names(cooksd)[(cooksd > 4/n)])
outliers_removed <- world_happiness_2021[-influential_obs, ]
nooutliers_happiness_model <- lm(ladder_score ~ regional_indicator + logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = outliers_removed)
anova(nooutliers_happiness_model)
## Analysis of Variance Table
##
## Response: ladder_score
## Df Sum Sq Mean Sq F value Pr(>F)
## regional_indicator 9 97.179 10.7977 65.7565 < 2.2e-16 ***
## logged_gdp_per_capita 1 14.037 14.0369 85.4823 1.075e-15 ***
## social_support 1 2.201 2.2013 13.4054 0.0003744 ***
## healthy_life_expectancy 1 0.419 0.4191 2.5524 0.1127593
## freedom_to_make_life_choices 1 9.105 9.1054 55.4508 1.606e-11 ***
## generosity 1 1.284 1.2839 7.8188 0.0060234 **
## perceptions_of_corruption 1 0.825 0.8254 5.0265 0.0267994 *
## Residuals 120 19.705 0.1642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yhat <- nooutliers_happiness_model$fitted.values
res <- nooutliers_happiness_model$residuals
t <- rstudent(nooutliers_happiness_model)
stdres <- rstandard(nooutliers_happiness_model)
par(mfrow=c(1,2))
plot(yhat, res, ylab="Residuals", xlab="Fitted Value", main="Residual vs Fitted")
abline(0, 0)
plot(yhat, t, ylab="Studentized", xlab="Fitted Value", main="Studentized Residual vs Fitted")
abline(0, 0)
par(mfrow=c(1,2))
qqnorm(res, ylab="Residuals", xlab="Normal Scores",main="Residual Q-Q Plot")
qqline(res)
qqnorm(t, ylab="Studentized Residuals", xlab="Normal Scores",main="Studentized Residual Q-Q Plot")
qqline(t)
13 outliers were removed from the data, resulting in a bit more linear distribution of the residuals on the Q-Q plots.
ggplot(world_happiness_2021, aes(x = regional_indicator, y = ladder_score)) +
geom_point(color = "blue", alpha = 0.5, size = 1) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Region") +
ylab("Ladder Score") +
ggtitle("Ladder Score vs. Country Region", subtitle = "How does the region of a country affect ladder score?")
ggplot(world_happiness_2021, aes(x = regional_indicator, y = social_support)) +
geom_point(color = "blue", alpha = 0.5, size = 1) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Region") +
ylab("Social Support") +
ggtitle("Social Support vs. Country Region", subtitle = "How does the region of a country affect social support?")
ggplot(world_happiness_2021, aes(x = regional_indicator, y = freedom_to_make_life_choices)) +
geom_point(color = "blue", alpha = 0.5, size = 1) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Region") +
ylab("Freedom to Make Life Choices") +
ggtitle("Freedom to Make Life Choices vs. Country Region", subtitle = "How does the region of a country affect the freedom to make life choices?")
Create a linear model where the dependent variable is the ladder score, and the independent variables are social support and regional indicator.
world_model <-lm(world_happiness_2021$ladder_score ~ world_happiness_2021$social_support + world_happiness_2021$regional_indicator, data = world_happiness_2021)
summary(world_model)
##
## Call:
## lm(formula = world_happiness_2021$ladder_score ~ world_happiness_2021$social_support +
## world_happiness_2021$regional_indicator, data = world_happiness_2021)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7096 -0.2835 0.0285 0.3640 1.4449
##
## Coefficients:
## Estimate
## (Intercept) 2.16448
## world_happiness_2021$social_support 4.30498
## world_happiness_2021$regional_indicatorCommonwealth of Independent States -0.45357
## world_happiness_2021$regional_indicatorEast Asia -0.05858
## world_happiness_2021$regional_indicatorLatin America and Caribbean 0.12954
## world_happiness_2021$regional_indicatorMiddle East and North Africa -0.37856
## world_happiness_2021$regional_indicatorNorth America and ANZ 0.94533
## world_happiness_2021$regional_indicatorSouth Asia -0.75086
## world_happiness_2021$regional_indicatorSoutheast Asia -0.28844
## world_happiness_2021$regional_indicatorSub-Saharan Africa -0.66950
## world_happiness_2021$regional_indicatorWestern Europe 0.81363
## Std. Error
## (Intercept) 0.55370
## world_happiness_2021$social_support 0.60313
## world_happiness_2021$regional_indicatorCommonwealth of Independent States 0.22071
## world_happiness_2021$regional_indicatorEast Asia 0.27821
## world_happiness_2021$regional_indicatorLatin America and Caribbean 0.19510
## world_happiness_2021$regional_indicatorMiddle East and North Africa 0.20779
## world_happiness_2021$regional_indicatorNorth America and ANZ 0.32622
## world_happiness_2021$regional_indicatorSouth Asia 0.28514
## world_happiness_2021$regional_indicatorSoutheast Asia 0.24448
## world_happiness_2021$regional_indicatorSub-Saharan Africa 0.20700
## world_happiness_2021$regional_indicatorWestern Europe 0.19152
## t value
## (Intercept) 3.909
## world_happiness_2021$social_support 7.138
## world_happiness_2021$regional_indicatorCommonwealth of Independent States -2.055
## world_happiness_2021$regional_indicatorEast Asia -0.211
## world_happiness_2021$regional_indicatorLatin America and Caribbean 0.664
## world_happiness_2021$regional_indicatorMiddle East and North Africa -1.822
## world_happiness_2021$regional_indicatorNorth America and ANZ 2.898
## world_happiness_2021$regional_indicatorSouth Asia -2.633
## world_happiness_2021$regional_indicatorSoutheast Asia -1.180
## world_happiness_2021$regional_indicatorSub-Saharan Africa -3.234
## world_happiness_2021$regional_indicatorWestern Europe 4.248
## Pr(>|t|)
## (Intercept) 0.000145
## world_happiness_2021$social_support 4.92e-11
## world_happiness_2021$regional_indicatorCommonwealth of Independent States 0.041757
## world_happiness_2021$regional_indicatorEast Asia 0.833551
## world_happiness_2021$regional_indicatorLatin America and Caribbean 0.507797
## world_happiness_2021$regional_indicatorMiddle East and North Africa 0.070642
## world_happiness_2021$regional_indicatorNorth America and ANZ 0.004372
## world_happiness_2021$regional_indicatorSouth Asia 0.009421
## world_happiness_2021$regional_indicatorSoutheast Asia 0.240106
## world_happiness_2021$regional_indicatorSub-Saharan Africa 0.001526
## world_happiness_2021$regional_indicatorWestern Europe 3.94e-05
##
## (Intercept) ***
## world_happiness_2021$social_support ***
## world_happiness_2021$regional_indicatorCommonwealth of Independent States *
## world_happiness_2021$regional_indicatorEast Asia
## world_happiness_2021$regional_indicatorLatin America and Caribbean
## world_happiness_2021$regional_indicatorMiddle East and North Africa .
## world_happiness_2021$regional_indicatorNorth America and ANZ **
## world_happiness_2021$regional_indicatorSouth Asia **
## world_happiness_2021$regional_indicatorSoutheast Asia
## world_happiness_2021$regional_indicatorSub-Saharan Africa **
## world_happiness_2021$regional_indicatorWestern Europe ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5849 on 138 degrees of freedom
## Multiple R-squared: 0.7234, Adjusted R-squared: 0.7034
## F-statistic: 36.1 on 10 and 138 DF, p-value: < 2.2e-16
Extract the coefficients and R^2 values from the summary to find which region has the highest social support coefficient.
coeffs = coefficients(world_model)
coeffs
## (Intercept)
## 2.16447692
## world_happiness_2021$social_support
## 4.30497761
## world_happiness_2021$regional_indicatorCommonwealth of Independent States
## -0.45356989
## world_happiness_2021$regional_indicatorEast Asia
## -0.05857683
## world_happiness_2021$regional_indicatorLatin America and Caribbean
## 0.12954437
## world_happiness_2021$regional_indicatorMiddle East and North Africa
## -0.37856495
## world_happiness_2021$regional_indicatorNorth America and ANZ
## 0.94532647
## world_happiness_2021$regional_indicatorSouth Asia
## -0.75086403
## world_happiness_2021$regional_indicatorSoutheast Asia
## -0.28843800
## world_happiness_2021$regional_indicatorSub-Saharan Africa
## -0.66949785
## world_happiness_2021$regional_indicatorWestern Europe
## 0.81362831
summary(world_model)$r.squared
## [1] 0.7234259
Western Europe has the highest social support coefficient.
To further explore this, test a multi-linear regression model again social support, ladder score, and freedom to make life choices in Western Europe to see how those factor change the ladder score.
weurope <- subset(world_happiness_2021, regional_indicator == "Western Europe")
pairs(~weurope$ladder_score+weurope$social_support+weurope$freedom_to_make_life_choices,main="Simple Scatterplot Matrix")
Create a linear regression model and display results.
weurope_model_generosity <-lm(ladder_score~social_support+freedom_to_make_life_choices+generosity, data=weurope)
summary(weurope_model_generosity)
##
## Call:
## lm(formula = ladder_score ~ social_support + freedom_to_make_life_choices +
## generosity, data = weurope)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6697 -0.1589 0.1042 0.2175 0.4807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.0271 1.7891 -1.133 0.27294
## social_support 8.1134 2.4618 3.296 0.00427 **
## freedom_to_make_life_choices 1.7743 1.3218 1.342 0.19716
## generosity 0.3631 0.7601 0.478 0.63895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3668 on 17 degrees of freedom
## Multiple R-squared: 0.7347, Adjusted R-squared: 0.6878
## F-statistic: 15.69 on 3 and 17 DF, p-value: 3.792e-05
Test if generosity is significant or insignificant via R^2.
First, examine an ANOVA table.
anova(weurope_model_generosity)
## Analysis of Variance Table
##
## Response: ladder_score
## Df Sum Sq Mean Sq F value Pr(>F)
## social_support 1 5.9202 5.9202 44.0015 4.229e-06 ***
## freedom_to_make_life_choices 1 0.3822 0.3822 2.8409 0.1102
## generosity 1 0.0307 0.0307 0.2282 0.6390
## Residuals 17 2.2873 0.1345
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Next, calculate R^2 and the adjusted R^2 values using the values from the ANOVA table.
R^2 = 1 - (SS(Res) / SS(T))
adjusted R^2 = 1 - SS(Res)/(n-p) / SS(T)/(n-1)
rsq1 <- 1 - ((2.2873) / (5.92+0.3822+0.0307+2.2873))
rsq1
## [1] 0.7346581
adj_rsq1 <- 1 - (2.2873 / 17) / ((5.92+0.3822+0.0307+2.2873) / 20)
adj_rsq1
## [1] 0.6878331
Finally, test for the significance of regression coefficient (x3 = generosity) via a hypothesis test.
H0: B3 = 0
H1: B3 <=> 0 (testing whether B3 is insignificant in regression equation)
to = B3 / se(B3) = 0.3631 / sqrt(sigmsq*vif(generosity))
sigmasq <- (sigma(weurope_model_generosity))^2
sigmasq
## [1] 0.1345448
vif <- vif(weurope_model_generosity)
vif
## social_support freedom_to_make_life_choices
## 2.120312 2.524734
## generosity
## 1.570089
t0 <- 0.3631 / (sqrt(0.1345448 * 1.570089))
t0
## [1] 0.7900065
t_(alpha/2, n-3) = t_(0.005, 17) = 2.898
t0 = 0.7900065
Since t0 < t_(alpha/2, n-3), we fail to reject H0. Therefore, there is not any strong evidence that there is a correlation between generosity and ladder score.
To see how the generosity of a country affect their perceptions of corruption, we will perform a simple linear regression analysis where the independent variable is generosity (X) and the perception of corruption is the dependent variable (Y).
genper_model <-lm(perceptions_of_corruption ~ generosity, data = world_happiness_2021)
summary(genper_model)
##
## Call:
## lm(formula = perceptions_of_corruption ~ generosity, data = world_happiness_2021)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64601 -0.05258 0.04683 0.11089 0.24822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.72450 0.01461 49.600 <2e-16 ***
## generosity -0.19505 0.09679 -2.015 0.0457 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1774 on 147 degrees of freedom
## Multiple R-squared: 0.02688, Adjusted R-squared: 0.02026
## F-statistic: 4.061 on 1 and 147 DF, p-value: 0.04571
y = -0.19505x + 0.72450
First, we will conduct a t test to determine whether or not there is a linear association between the two variables here as the alpha risk value of 0.10.
alpha <- 0.1
y <- length(world_happiness_2021$perceptions_of_corruption)
x <- length(unique(world_happiness_2021$generosity))
F.test.world_happiness_2021 <- qf(1 - alpha, x - 1, y - x)
F.test.world_happiness_2021
## [1] 1.663497
e_df <- world_happiness_2021 %>%
mutate(y_i = perceptions_of_corruption) %>%
mutate(x = generosity) %>%
mutate(y_hat = -0.19505*x + 0.72450) %>%
mutate(e_i = y_i - y_hat) %>%
summarize(x, y_i, y_hat, e_i)
e_i_sqrd <- e_df$e_i^2
sum_residuals <- sum(e_i_sqrd)
sum_residuals
## [1] 4.626261
S_xx = (sum((world_happiness_2021$generosity)^2)) - (((sum(world_happiness_2021$generosity))^2) / (149-2))
se_B1 = sqrt((sum_residuals / (149-2)) / (S_xx))
se_B1
## [1] 0.09679816
Testing Significance of Regression
Alternatives:
Ho: β1 = 0
Ha: β1 <=> 0
t_0 = (-0.19505 + 0) / (0.09679816)
t_0
## [1] -2.015018
t_(alpha/2, n-2) = t_(0.005, 147) = 1.655285
Since |-2.015018| > 1.655285, we reject the null hypothesis, and can we therefore say that there is a linear relationship between generosity and perceptions of cor
As shown in the summary of the genper_model model above, the β0 value 0.72450, which means that when generosity (x) is 0, the perception of corruption (y) is expected to be 0.72450. We also see that the slope, β1, is -0.19505. This means that the perception of corruption (y) is expected to decrease by -0.19505 for each 1 unit of increase in generosity (x).
3 different dfs -> instead of generosity change to factors we want look at, look at beta values to understand correlation
weurope <- subset(world_happiness_2021, regional_indicator == "Western Europe")
weurope_all_model <-lm(ladder_score ~ logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = weurope)
summary(weurope_all_model)
##
## Call:
## lm(formula = ladder_score ~ logged_gdp_per_capita + social_support +
## healthy_life_expectancy + freedom_to_make_life_choices +
## generosity + perceptions_of_corruption, data = weurope)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5306 -0.1438 -0.0051 0.1730 0.5004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08296 7.64206 -0.011 0.99149
## logged_gdp_per_capita 0.37140 0.30832 1.205 0.24833
## social_support 6.57583 2.13144 3.085 0.00807 **
## healthy_life_expectancy -0.03521 0.09279 -0.379 0.71008
## freedom_to_make_life_choices 0.07100 1.24412 0.057 0.95530
## generosity 0.17723 0.64510 0.275 0.78754
## perceptions_of_corruption -1.00238 0.47709 -2.101 0.05423 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3082 on 14 degrees of freedom
## Multiple R-squared: 0.8458, Adjusted R-squared: 0.7797
## F-statistic: 12.8 on 6 and 14 DF, p-value: 5.594e-05
Western Europe
Logged GDP per Capita INCREASES by 1; Ladder Score INCREASES by 0.37140
Social Support INCREASES by 1; Ladder Score INCREASES by 6.57583
Healthy Life Expectancy INCREASES by 1; Ladder Score DECREASES by 0.03521
Freedom to Make Life Choices INCREASES by 1; Ladder Score INCREASES by 0.07100
Generosity INCREASES by 1; Ladder Score INCREASES by 0.17723
Perceptions of Corruption INCREASES by 1; Ladder Score DECREASES by 1.00238
In Western Europe, the Social Score is what has the largest impact on the ladder score.
latamerica <- subset(world_happiness_2021, regional_indicator == "Latin America and Caribbean")
latamerica_all_model <-lm(ladder_score ~ logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = latamerica)
summary(latamerica_all_model)
##
## Call:
## lm(formula = ladder_score ~ logged_gdp_per_capita + social_support +
## healthy_life_expectancy + freedom_to_make_life_choices +
## generosity + perceptions_of_corruption, data = latamerica)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3983 -0.1434 -0.0241 0.1424 0.5490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.49714 2.60454 -2.111 0.054749 .
## logged_gdp_per_capita 0.02715 0.20799 0.131 0.898157
## social_support 0.48564 1.84805 0.263 0.796836
## healthy_life_expectancy 0.11484 0.04830 2.378 0.033452 *
## freedom_to_make_life_choices 4.13883 0.94984 4.357 0.000776 ***
## generosity 0.38819 0.94989 0.409 0.689438
## perceptions_of_corruption -0.47465 0.90271 -0.526 0.607874
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3006 on 13 degrees of freedom
## Multiple R-squared: 0.8714, Adjusted R-squared: 0.8121
## F-statistic: 14.69 on 6 and 13 DF, p-value: 4.078e-05
Latin America & Caribbean
Logged GDP per Capita INCREASES by 1; Ladder Score INCREASES by 0.02715
Social Support INCREASES by 1; Ladder Score INCREASES by 0.48564
Healthy Life Expectancy INCREASES by 1; Ladder Score INCREASES by 0.11484
Freedom to Make Life Choices INCREASES by 1; Ladder Score INCREASES by 4.13883
Generosity INCREASES by 1; Ladder Score INCREASES by 0.38819
Perceptions of Corruption INCREASES by 1; Ladder Score DECREASES by 0.47465
In Latin America & Caribbean, the Freedom to Make Life Choices is what has the largest impact on the ladder score.
ssafrica <- subset(world_happiness_2021, regional_indicator == "Sub-Saharan Africa")
ssafrica_all_model <-lm(ladder_score ~ logged_gdp_per_capita + social_support + healthy_life_expectancy + freedom_to_make_life_choices + generosity + perceptions_of_corruption, data = ssafrica)
summary(ssafrica_all_model)
##
## Call:
## lm(formula = ladder_score ~ logged_gdp_per_capita + social_support +
## healthy_life_expectancy + freedom_to_make_life_choices +
## generosity + perceptions_of_corruption, data = ssafrica)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.31892 -0.29280 0.03784 0.43805 0.98049
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.246384 2.074988 0.119 0.906
## logged_gdp_per_capita 0.343159 0.211825 1.620 0.116
## social_support -1.087070 1.731823 -0.628 0.535
## healthy_life_expectancy 0.007795 0.033919 0.230 0.820
## freedom_to_make_life_choices 0.757828 1.453040 0.522 0.606
## generosity 1.382039 1.006261 1.373 0.180
## perceptions_of_corruption 1.608688 1.148309 1.401 0.172
##
## Residual standard error: 0.6356 on 29 degrees of freedom
## Multiple R-squared: 0.2196, Adjusted R-squared: 0.05817
## F-statistic: 1.36 on 6 and 29 DF, p-value: 0.2636
Sub-Saharan Africa
Logged GDP per Capita INCREASES by 1; Ladder Score INCREASES by 0.343159
Social Support INCREASES by 1; Ladder Score DECREASES by 1.087070
Healthy Life Expectancy INCREASES by 1; Ladder Score INCREASES by 0.007795
Freedom to Make Life Choices INCREASES by 1; Ladder Score INCREASES by 0.757828
Generosity INCREASES by 1; Ladder Score INCREASES by 1.382039
Perceptions of Corruption INCREASES by 1; Ladder Score INCREASES by 1.608688
In Sub-Saharan Africa, the Perceptions of Corruption is what has the largest impact on the ladder score.
ladder_model <-lm(ladder_score ~ social_support + freedom_to_make_life_choices, data = world_happiness_2021)
summary(ladder_model)
##
## Call:
## lm(formula = ladder_score ~ social_support + freedom_to_make_life_choices,
## data = world_happiness_2021)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9892 -0.3735 0.0280 0.4863 1.4558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.4393 0.4334 -3.321 0.00113 **
## social_support 5.6489 0.5231 10.799 < 2e-16 ***
## freedom_to_make_life_choices 2.9935 0.5303 5.645 8.33e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6402 on 146 degrees of freedom
## Multiple R-squared: 0.6494, Adjusted R-squared: 0.6446
## F-statistic: 135.2 on 2 and 146 DF, p-value: < 2.2e-16
anova(ladder_model)
## Analysis of Variance Table
##
## Response: ladder_score
## Df Sum Sq Mean Sq F value Pr(>F)
## social_support 1 97.785 97.785 238.564 < 2.2e-16 ***
## freedom_to_make_life_choices 1 13.061 13.061 31.866 8.334e-08 ***
## Residuals 146 59.844 0.410
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yhat <- ladder_model$fitted.values
res <- ladder_model$residuals
t <- rstudent(ladder_model)
stdres <- rstandard(ladder_model)
par(mfrow=c(1,2))
plot(yhat, res, ylab="Residuals", xlab="Fitted Value", main="Residual vs Fitted")
abline(0, 0)
plot(yhat, t, ylab="Studentized", xlab="Fitted Value", main="Studentized Residual vs Fitted")
abline(0, 0)
par(mfrow=c(1,2))
qqnorm(res, ylab="Residuals", xlab="Normal Scores",main="Residual Q-Q Plot")
qqline(res)
qqnorm(t, ylab="Studentized Residuals", xlab="Normal Scores",main="Studentized Residual Q-Q Plot")
qqline(t)
# which country has the highest ladder score?
country_ladder_score <- world_happiness_2021 %>%
group_by(country_name) %>%
summarize(country_name, ladder_score) %>%
arrange(desc(ladder_score)) %>%
distinct()
head(country_ladder_score, 1)
## # A tibble: 1 × 2
## country_name ladder_score
## <chr> <dbl>
## 1 Finland 7.84
ggplot(world_happiness_2021, aes(x = fct_inorder(country_name), y = ladder_score)) +
geom_point(color = "blue", size = 1) +
scale_x_discrete(guide = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 4)) +
xlab("Country") +
ylab("Ladder Score") +
ggtitle("Ladder Score of Each Country")
Next, we will examine the average ladder score of the ten regions.
avg_region <- world_happiness_2021 %>%
group_by(regional_indicator) %>%
mutate(avg_ladder_score = sum(ladder_score) / n()) %>%
mutate(avg_social_support = sum(social_support) / n()) %>%
mutate(avg_ftmlc = sum(freedom_to_make_life_choices) / n()) %>%
summarize(regional_indicator, avg_ladder_score, avg_social_support, avg_ftmlc) %>%
arrange(desc(avg_ladder_score)) %>%
distinct()
## `summarise()` has grouped output by 'regional_indicator'. You can override using
## the `.groups` argument.
regions_model <-lm(avg_ladder_score ~ avg_social_support + avg_ftmlc, data = avg_region)
summary(regions_model)
##
## Call:
## lm(formula = avg_ladder_score ~ avg_social_support + avg_ftmlc,
## data = avg_region)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5989 -0.1561 0.0303 0.2016 0.3885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.423 1.451 -2.359 0.05039 .
## avg_social_support 9.373 1.849 5.069 0.00145 **
## avg_ftmlc 1.605 2.210 0.726 0.49125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3481 on 7 degrees of freedom
## Multiple R-squared: 0.8798, Adjusted R-squared: 0.8454
## F-statistic: 25.61 on 2 and 7 DF, p-value: 0.0006026
anova(regions_model)
## Analysis of Variance Table
##
## Response: avg_ladder_score
## Df Sum Sq Mean Sq F value Pr(>F)
## avg_social_support 1 6.1428 6.1428 50.6956 0.0001903 ***
## avg_ftmlc 1 0.0639 0.0639 0.5274 0.4912508
## Residuals 7 0.8482 0.1212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yhat <- regions_model$fitted.values
res <- regions_model$residuals
t <- rstudent(regions_model)
stdres <- rstandard(regions_model)
par(mfrow=c(1,2))
plot(yhat, res, ylab="Residuals", xlab="Fitted Value", main="Residual vs Fitted")
abline(0, 0)
plot(yhat, t, ylab="Studentized", xlab="Fitted Value", main="Studentized Residual vs Fitted")
abline(0, 0)
par(mfrow=c(1,2))
qqnorm(res, ylab="Residuals", xlab="Normal Scores",main="Residual Q-Q Plot")
qqline(res)
qqnorm(t, ylab="Studentized Residuals", xlab="Normal Scores",main="Studentized Residual Q-Q Plot")
qqline(t)
# which region has the highest average ladder score?
head(avg_region, 1)
## # A tibble: 1 × 4
## # Groups: regional_indicator [1]
## regional_indicator avg_ladder_score avg_social_support avg_ftmlc
## <chr> <dbl> <dbl> <dbl>
## 1 North America and ANZ 7.13 0.934 0.899
ggplot(avg_region, aes(x = fct_inorder(regional_indicator), y = avg_ladder_score)) +
geom_point(color = "blue") +
scale_x_discrete(guide = guide_axis(angle = 60)) +
xlab("Regions") +
ylab("Average Ladder Score") +
ggtitle("Average Ladder Score of Each Region")
Generosity was not a factor into ladder score calculations for our data set.
There is not one single factor that impacts ladder score most.
The region that has the highest ladder score is North America and ANZ and the country with the highest ladder score is Finland.